{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "SNIEiTSY08lf"
      },
      "source": [
        "# Welcome to Hanasu TTS Trainer"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "l4M-3d4zQTYC"
      },
      "outputs": [],
      "source": [
        "import zipfile\n",
        "with zipfile.ZipFile('wavs.zip', 'r') as zip_ref: zip_ref.extractall('zipfolder')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "8GnDPtUARGji"
      },
      "outputs": [],
      "source": [
        "%cd /content/drive/MyDrive/hanasu/hanasu\n",
        "!pip install -r requirements.txt\n",
        "%cd /content/drive/MyDrive/vits2_pytorch-main/monotonic_align\n",
        "!python setup.py build_ext --inplace\n",
        "%cd ../\n",
        "!apt-get update && apt-get install -y espeak-ng\n",
        "!pip install phonemizer"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "P2yS6_LM1bVd"
      },
      "source": [
        "# Preprocess the dataset"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "qQ-GhNqynpzx"
      },
      "outputs": [],
      "source": [
        "from text import preprocess_filelists\n",
        "preprocess_filelists([\"transcript.txt\"])"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Fix Bugs"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "!pip install librosa==0.9.1\n",
        "!pip install tensorboard==2.12.0 tensorflow==2.12.0\n",
        "!pip install matplotlib==3.7.0 # This fixes: 'FigureCanvasAgg' object has no attribute 'tostring_rgb'\n",
        "!pip install numpy==1.26.4"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import os\n",
        "directory = '/content/Yuna/wavs'\n",
        "for filename in os.listdir(directory):\n",
        "  if filename.endswith(\".mel.pt\"):\n",
        "    os.remove(os.path.join(directory, filename))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# move all wavs from one directory into another\n",
        "import shutil\n",
        "import os\n",
        "\n",
        "source_dir = 'source_folder'\n",
        "destination_dir = 'destination_folder'\n",
        "\n",
        "# Ensure the destination directory exists\n",
        "os.makedirs(destination_dir, exist_ok=True)\n",
        "\n",
        "# Iterate through files in the source directory\n",
        "for filename in os.listdir(source_dir):\n",
        "    # Check if the file is a wav file\n",
        "    if filename.endswith(\".wav\"):\n",
        "        source_path = os.path.join(source_dir, filename)\n",
        "        destination_path = os.path.join(destination_dir, filename)\n",
        "        # Move the file\n",
        "        shutil.move(source_path, destination_path)\n",
        "\n",
        "print(f\"Moved all wav files from {source_dir} to {destination_dir}\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "jPPDxdW51jck"
      },
      "source": [
        "# Training"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "%cd /content/drive/MyDrive/hanasu"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "ljy_hDZCSE14"
      },
      "outputs": [],
      "source": [
        "!python train_ms.py -c /content/drive/MyDrive/hanasu/configs/config.json -m Yuna"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import os\n",
        "os.runtime.unassigned_runtime_shutdown()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "XGE29b1q1g-x"
      },
      "source": [
        "# Inference"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "%cd ../hanasu"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from models import inference, load_model\n",
        "from scipy.io.wavfile import write\n",
        "import sounddevice as sd"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "3rXuDdi44xsi"
      },
      "outputs": [],
      "source": [
        "text =\"Text for generation.\"\n",
        "# Or you can read from a file:\n",
        "# with open(\"../output.txt\", 'r', encoding='utf-8') as f: text = f.read()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "model = load_model(\"./configs/config.json\", \"/Users/yuki/Downloads/G_158000.pth\", device=\"mps\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# No streaming inference\n",
        "result = inference(\n",
        "    model=model,\n",
        "    text=text,\n",
        "    noise_scale=0.2,\n",
        "    noise_scale_w=1.0,\n",
        "    length_scale=1.0,\n",
        "    device=\"mps\",\n",
        "    stream=False,\n",
        ")\n",
        "\n",
        "write(data=result, rate=48000, filename=\"sample_vits2.wav\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Streaming inference\n",
        "audio_generator = inference(\n",
        "    model=model,\n",
        "    text=text,\n",
        "    noise_scale=0.17,\n",
        "    noise_scale_w=1.0,\n",
        "    length_scale=1.0,\n",
        "    device=\"mps\",\n",
        "    stream=True,\n",
        ")\n",
        "\n",
        "for audio_chunk in audio_generator:\n",
        "    # Play each chunk immediately as it's generated\n",
        "    sd.play(audio_chunk, samplerate=48000)\n",
        "    sd.wait()  # Wait for chunk to finish playing\n",
        "    print(f\"Played chunk of {len(audio_chunk)} samples\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Voice Conversion Inference\n",
        "from hanasu.models import voice_conversion_inference\n",
        "\n",
        "# Perform voice conversion\n",
        "converted_audio = voice_conversion_inference(\n",
        "    model=model,\n",
        "    source_wav_path=\"input.wav\",\n",
        "    source_speaker_id=2,\n",
        "    target_speaker_id=1,\n",
        "    device=\"mps\"\n",
        ")\n",
        "\n",
        "write(data=converted_audio, rate=48000, filename=\"voice_converted_audio.wav\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Export"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from hanasu.onnx_utils import export_onnx, synthesize\n",
        "\n",
        "export_onnx(\"/Users/yuki/Downloads/G_15000.pth\", \"/Users/yuki/Documents/Github/hanasu/hanasu/configs/config.json\", \"output.onnx\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "synthesize(\"/Users/yuki/Documents/Github/hanasu/hanasu/output.onnx\", \"/Users/yuki/Documents/Github/hanasu/hanasu/configs/config.json\", \"output.wav\", \"Hello, world!\", sid=0, scales=[0.2, 1.0, 1.0])"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "L4",
      "machine_shape": "hm",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "yuna",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.10.15"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
